In [1]:
import librosa
from librosa import display
from PIL import Image
from matplotlib import pyplot
from numpy import asarray
import matplotlib.pyplot as plt
import numpy as np
import cmath
import seaborn as sns
import scipy
import IPython.display as ipd
import math
from numpy.linalg import inv
import torch 
import torchvision
from torchvision import datasets
import numpy as np
import time
import numpy
import matplotlib.pyplot as plt
import torch.nn as nn
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

Uploading clean male voice

In [2]:
train_clean_male, sr1 = librosa.load("train_clean_male.wav",sr=None)
S=librosa.stft(train_clean_male,n_fft=1024,hop_length=512)
ipd.display(ipd.Audio(train_clean_male,rate=16000))

Uploading Dirty Male Voice

In [3]:
sn,sr2=librosa.load("train_dirty_male.wav",sr=None)
X=librosa.stft(sn,n_fft=1024,hop_length=512)
ipd.display(ipd.Audio(sn,rate=16000))

Taking Magnitudes of clean voice (S) and dirty voice(X)

In [4]:
mod_S=np.abs(S)
mod_X=np.abs(X)
In [5]:
print(np.shape(mod_S))
(513, 2459)
In [6]:
print(np.shape(mod_X))
(513, 2459)

Network Creation

As part of the Network we have used the Following Architecture:

1) Convolution Layer with kernel size = 2, stride =1 and 16 filters followed ny ReLu Activation.

2)Max Pooling with kernel size=2 and stride =1

3)Convolution Layer with kernel size = 2, stride =1 and 32 filters followed ny ReLu Activation.

4) Max Pooling with kernel size=2 and stride =2

5) Reducing the layer to fullyconnected layer with 8160 neurons (layer before the output layer)

6) The final layer is with 513 neurons with ReLu Activation as we need positive values as part of the output.

In [7]:
class onedcnn(nn.Module):
    def __init__(self):
        super(onedcnn, self).__init__()


        
        self.layer1= nn.Sequential(
            nn.Conv1d(1, 16, kernel_size=2, stride=1),
            nn.ReLU(),
            nn.MaxPool1d(2, stride=1))
        
        self.layer2= nn.Sequential(
            nn.Conv1d(16, 32, kernel_size=2, stride=1),
            nn.ReLU(),
            nn.MaxPool1d(2, stride=2))
        
        self.layer3=torch.nn.Linear(255*16*2, 513)

        self.act=nn.ReLU()
    
    def forward(self, x):

      
        out1 = self.layer1(x)
        out2 = self.layer2(out1)
        out3=out2.reshape(out2.size(0), out2.size(1)*out2.size(2)) 
        out4=self.act(self.layer3(out3))
        return out4
In [8]:
model1 = onedcnn().cuda()
# Loss and optimizer
criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model1.parameters(),lr=0.0001) 
In [9]:
errt=[0 for i in range(200)]
for epoch in range(200):
    running_loss=0
    for j in range(20):
        # Move tensors to the configured device
        if (j+1)*128 <= 2459:
          images= torch.tensor(mod_X[:,j*128:(j+1)*128],device=device)
          labels=torch.tensor(mod_S[:,j*128:(j+1)*128],device=device)
        else:
          images=torch.tensor(mod_X[:,j*128:2459],device=device)
          labels = torch.tensor(mod_S[:,j*128:2459],device=device)
        
        # Forward pass
        transposed_images=torch.transpose(images, 0, 1).to(device)
        transposed_images.resize_(images.size(1),1, 513)
        outputs = model1(transposed_images).cuda()

        # print(np.shape(outputs),np.shape(torch.transpose(labels,0,1)))
        
        loss = criterion(outputs.cuda(), torch.transpose(labels,0,1))
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        running_loss+=loss.item()
    
    errt[epoch]=running_loss/19
    print("Epoch:", epoch,"Loss:",running_loss/19)
Epoch: 0 Loss: 0.08580738943266242
Epoch: 1 Loss: 0.07574183165438865
Epoch: 2 Loss: 0.06950347762750952
Epoch: 3 Loss: 0.06389535233182342
Epoch: 4 Loss: 0.05926167754162299
Epoch: 5 Loss: 0.0552594408785042
Epoch: 6 Loss: 0.05164357142424897
Epoch: 7 Loss: 0.04828512913694507
Epoch: 8 Loss: 0.04512774591383181
Epoch: 9 Loss: 0.0421649276425964
Epoch: 10 Loss: 0.03939455247631198
Epoch: 11 Loss: 0.03682813224823851
Epoch: 12 Loss: 0.03431605104062902
Epoch: 13 Loss: 0.03134042394690608
Epoch: 14 Loss: 0.028981515525006933
Epoch: 15 Loss: 0.026893123543184055
Epoch: 16 Loss: 0.025017124527183018
Epoch: 17 Loss: 0.023327963500234642
Epoch: 18 Loss: 0.021826825300721747
Epoch: 19 Loss: 0.02049132782083593
Epoch: 20 Loss: 0.019298483911705643
Epoch: 21 Loss: 0.01823687901426303
Epoch: 22 Loss: 0.017287459543072863
Epoch: 23 Loss: 0.016441975303582455
Epoch: 24 Loss: 0.015685614529310873
Epoch: 25 Loss: 0.0150120076233227
Epoch: 26 Loss: 0.014408224899517862
Epoch: 27 Loss: 0.013858580483907932
Epoch: 28 Loss: 0.013368246831784123
Epoch: 29 Loss: 0.012922093247700678
Epoch: 30 Loss: 0.012523306141558447
Epoch: 31 Loss: 0.012156543726297585
Epoch: 32 Loss: 0.011828405194376645
Epoch: 33 Loss: 0.011518085755309776
Epoch: 34 Loss: 0.01118964405338231
Epoch: 35 Loss: 0.010895240549488287
Epoch: 36 Loss: 0.010623432144424632
Epoch: 37 Loss: 0.01037501694487506
Epoch: 38 Loss: 0.010160663753355803
Epoch: 39 Loss: 0.009963708770412364
Epoch: 40 Loss: 0.009768773433997444
Epoch: 41 Loss: 0.009599014761318502
Epoch: 42 Loss: 0.009442752754119666
Epoch: 43 Loss: 0.009284046968739284
Epoch: 44 Loss: 0.009139891137908163
Epoch: 45 Loss: 0.009000584691468822
Epoch: 46 Loss: 0.008885776280964675
Epoch: 47 Loss: 0.008779315743595362
Epoch: 48 Loss: 0.008672571422434166
Epoch: 49 Loss: 0.008551491301898894
Epoch: 50 Loss: 0.008445255691185594
Epoch: 51 Loss: 0.008346440711695897
Epoch: 52 Loss: 0.00825424087969096
Epoch: 53 Loss: 0.008078321973842225
Epoch: 54 Loss: 0.007590718567371368
Epoch: 55 Loss: 0.007372498598047777
Epoch: 56 Loss: 0.007220310054866499
Epoch: 57 Loss: 0.007108699885736171
Epoch: 58 Loss: 0.007033219487455331
Epoch: 59 Loss: 0.006950291130985869
Epoch: 60 Loss: 0.006862373152551682
Epoch: 61 Loss: 0.00680034274946114
Epoch: 62 Loss: 0.006734170227948772
Epoch: 63 Loss: 0.006673675906648369
Epoch: 64 Loss: 0.006627053996597074
Epoch: 65 Loss: 0.0065506678135869535
Epoch: 66 Loss: 0.006486154326825942
Epoch: 67 Loss: 0.006423939154238293
Epoch: 68 Loss: 0.006368098204563323
Epoch: 69 Loss: 0.006332144073798861
Epoch: 70 Loss: 0.006269429485607696
Epoch: 71 Loss: 0.006230446945042594
Epoch: 72 Loss: 0.006184622320640636
Epoch: 73 Loss: 0.006134013357376189
Epoch: 74 Loss: 0.006086395814475652
Epoch: 75 Loss: 0.0060456283696878116
Epoch: 76 Loss: 0.006006112781745431
Epoch: 77 Loss: 0.005983831017817322
Epoch: 78 Loss: 0.005922665844034208
Epoch: 79 Loss: 0.005881948157605764
Epoch: 80 Loss: 0.005841630860231817
Epoch: 81 Loss: 0.0058023383791901565
Epoch: 82 Loss: 0.005768442428425739
Epoch: 83 Loss: 0.00573372023523246
Epoch: 84 Loss: 0.005699392469403775
Epoch: 85 Loss: 0.0056631272401366575
Epoch: 86 Loss: 0.005627914705607844
Epoch: 87 Loss: 0.005605354359814603
Epoch: 88 Loss: 0.005562606249249687
Epoch: 89 Loss: 0.0055345155874659355
Epoch: 90 Loss: 0.005514457797337519
Epoch: 91 Loss: 0.005480242905354029
Epoch: 92 Loss: 0.0054456695522132675
Epoch: 93 Loss: 0.005423707771115005
Epoch: 94 Loss: 0.005393816311353524
Epoch: 95 Loss: 0.0053750169157099565
Epoch: 96 Loss: 0.005341388285160065
Epoch: 97 Loss: 0.005307526468593431
Epoch: 98 Loss: 0.005276466192873685
Epoch: 99 Loss: 0.005237840667465015
Epoch: 100 Loss: 0.005217324917841899
Epoch: 101 Loss: 0.005173553761683013
Epoch: 102 Loss: 0.005167374369002094
Epoch: 103 Loss: 0.005142390427767839
Epoch: 104 Loss: 0.005111879405663594
Epoch: 105 Loss: 0.005080515328843735
Epoch: 106 Loss: 0.005053704261387649
Epoch: 107 Loss: 0.005024040491614295
Epoch: 108 Loss: 0.0050014930863031435
Epoch: 109 Loss: 0.004972551397881226
Epoch: 110 Loss: 0.004949592593076982
Epoch: 111 Loss: 0.004931243694093274
Epoch: 112 Loss: 0.004914073396081987
Epoch: 113 Loss: 0.004897477331963417
Epoch: 114 Loss: 0.004879886082275526
Epoch: 115 Loss: 0.0048712065909057856
Epoch: 116 Loss: 0.004861685239072693
Epoch: 117 Loss: 0.004810073854107606
Epoch: 118 Loss: 0.0047602333851452724
Epoch: 119 Loss: 0.004725667862466683
Epoch: 120 Loss: 0.004702109573899131
Epoch: 121 Loss: 0.004698963786818479
Epoch: 122 Loss: 0.004681479890438679
Epoch: 123 Loss: 0.00465373447360961
Epoch: 124 Loss: 0.004622984365069945
Epoch: 125 Loss: 0.00459746058760701
Epoch: 126 Loss: 0.00456815584268617
Epoch: 127 Loss: 0.00455308286473155
Epoch: 128 Loss: 0.004534425667340034
Epoch: 129 Loss: 0.004529970900253638
Epoch: 130 Loss: 0.0045247458619996905
Epoch: 131 Loss: 0.00452761815885376
Epoch: 132 Loss: 0.004501524626424438
Epoch: 133 Loss: 0.004477846491346626
Epoch: 134 Loss: 0.004465730523837632
Epoch: 135 Loss: 0.004424516589527852
Epoch: 136 Loss: 0.004394411287074418
Epoch: 137 Loss: 0.004373801406472921
Epoch: 138 Loss: 0.0043819246072273116
Epoch: 139 Loss: 0.004373386364405681
Epoch: 140 Loss: 0.004349930040342243
Epoch: 141 Loss: 0.004323907064788632
Epoch: 142 Loss: 0.004313949832545691
Epoch: 143 Loss: 0.004293540673403952
Epoch: 144 Loss: 0.0042688613573677444
Epoch: 145 Loss: 0.004258572516080581
Epoch: 146 Loss: 0.004235431460026456
Epoch: 147 Loss: 0.004223497578335044
Epoch: 148 Loss: 0.004230814023972734
Epoch: 149 Loss: 0.004179965186937663
Epoch: 150 Loss: 0.004164194152077758
Epoch: 151 Loss: 0.00415104349437905
Epoch: 152 Loss: 0.004132375246722643
Epoch: 153 Loss: 0.004124466623914869
Epoch: 154 Loss: 0.004098175978900767
Epoch: 155 Loss: 0.004104969220382995
Epoch: 156 Loss: 0.004096569055323734
Epoch: 157 Loss: 0.004081240132156955
Epoch: 158 Loss: 0.004088646539266368
Epoch: 159 Loss: 0.004072069116917096
Epoch: 160 Loss: 0.004068531975223634
Epoch: 161 Loss: 0.004052481372031923
Epoch: 162 Loss: 0.004037613882438133
Epoch: 163 Loss: 0.004018848412670195
Epoch: 164 Loss: 0.004014721189282443
Epoch: 165 Loss: 0.00401932208210622
Epoch: 166 Loss: 0.0040196294063015985
Epoch: 167 Loss: 0.004014344889008881
Epoch: 168 Loss: 0.004003615614860074
Epoch: 169 Loss: 0.003991646272448921
Epoch: 170 Loss: 0.0039736719887801695
Epoch: 171 Loss: 0.003956104647123108
Epoch: 172 Loss: 0.003933592662705402
Epoch: 173 Loss: 0.003919733176620579
Epoch: 174 Loss: 0.003926500760778589
Epoch: 175 Loss: 0.00390103230203845
Epoch: 176 Loss: 0.0038978884051749972
Epoch: 177 Loss: 0.0038791752395857322
Epoch: 178 Loss: 0.0038715748998679614
Epoch: 179 Loss: 0.0038606975730018397
Epoch: 180 Loss: 0.003858911866126092
Epoch: 181 Loss: 0.00384702635806446
Epoch: 182 Loss: 0.003839805103397291
Epoch: 183 Loss: 0.003811580913239404
Epoch: 184 Loss: 0.0038111296843884417
Epoch: 185 Loss: 0.003821482894157893
Epoch: 186 Loss: 0.0038217060807112014
Epoch: 187 Loss: 0.003795064749857901
Epoch: 188 Loss: 0.003795253532620049
Epoch: 189 Loss: 0.0037792340766540484
Epoch: 190 Loss: 0.0037779423046710066
Epoch: 191 Loss: 0.0037577964371609453
Epoch: 192 Loss: 0.0037573065277875252
Epoch: 193 Loss: 0.0037496756137299692
Epoch: 194 Loss: 0.003761932661291212
Epoch: 195 Loss: 0.0037102583936709714
Epoch: 196 Loss: 0.0037038161014941963
Epoch: 197 Loss: 0.0036928649428055473
Epoch: 198 Loss: 0.00368466612723607
Epoch: 199 Loss: 0.0036835905170607332
In [10]:
plt.figure()
plt.plot(errt)
plt.title('Convergence')
Out[10]:
Text(0.5, 1.0, 'Convergence')

Uploading test_x_01

In [11]:
test_x_01,sr2=librosa.load("test_x_01.wav",sr=None)
ipd.display(ipd.Audio(test_x_01,rate=16000))

Performing STFT and taking absolute value of testx01

In [12]:
testx01=librosa.stft(test_x_01,n_fft=1024,hop_length=512)
testx01_abs=np.abs(testx01)
print(np.shape(testx01_abs))
(513, 142)

Predicting outputs for test_x_01

In [13]:
test_x_01_tensor=torch.tensor(testx01_abs)
test_x_01_transpose=torch.transpose(test_x_01_tensor, 0, 1).to(device)
test_x_01_transpose.resize_(142,1, 513)
with torch.no_grad(): 
  new_outputs_test_x_01=model1(test_x_01_transpose)
print(np.shape(new_outputs_test_x_01))
torch.Size([142, 513])

Recovered Signal for test_x_01

In [14]:
new_outputs1=torch.transpose(new_outputs_test_x_01,0,1)
new_output2=torch.div(torch.mul(torch.tensor(testx01,device=device),new_outputs1),torch.abs(torch.tensor(testx01,device=device)))
recovered_test_01_x=(new_output2.data).cpu().numpy()
signal_test_01_x=librosa.core.istft(recovered_test_01_x, hop_length=512)
ipd.display(ipd.Audio(librosa.core.istft(recovered_test_01_x, hop_length=512),rate=16000))

Uploading test_x_02 signal

In [15]:
test_x_02,sr2=librosa.load("test_x_02.wav",sr=None)
ipd.display(ipd.Audio(test_x_02,rate=16000))

Performing STFT and taking absolute value of testx02

In [16]:
testx02=librosa.stft(test_x_02,n_fft=1024,hop_length=512)
testx02_abs=np.abs(testx02)
print(np.shape(testx02_abs))
(513, 380)

Predicting outputs for test_x_02

In [17]:
test_x_02_tensor=torch.tensor(testx02_abs)
test_x_02_transpose=torch.transpose(test_x_02_tensor, 0, 1).to(device)
test_x_02_transpose.resize_(380,1, 513)
with torch.no_grad():
  new_outputs_test_x_02=model1(test_x_02_transpose)
print(np.shape(new_outputs_test_x_02))
torch.Size([380, 513])

Recovered Signal for test_x_02

In [18]:
new_output11=torch.transpose(new_outputs_test_x_02,0,1)
new_output12=torch.div(torch.mul(torch.tensor(testx02,device=device),new_output11),torch.abs(torch.tensor(testx02,device=device)))
recovered_test_02_x=(new_output12.data).cpu().numpy()
signal_test_02_x= librosa.core.istft(recovered_test_02_x, hop_length=512)
ipd.display(ipd.Audio(librosa.core.istft(recovered_test_02_x, hop_length=512),rate=16000))

Code For SNR Calculation

In [19]:
def snr(ground_clean, recovered_one):
  import math
  a=min(len(ground_clean),len(recovered_one))

  ground1=ground_clean[0:a]
  recovered1=recovered_one[0:a]
  num=np.sum(np.square(ground1))
  diff=np.sum(np.square(ground1-recovered1))
  SNR=10*math.log(num/diff,10)
  return SNR
In [20]:
mod_S_tensor=torch.tensor(mod_X)
mod_S_t=torch.transpose(mod_S_tensor, 0, 1).to(device)
mod_S_t.resize_(2459,1, 513)
with torch.no_grad(): 
  new_outputs=model1(mod_S_t)
new_outputs13=torch.transpose(new_outputs,0,1)

Recovering Clean Output for Train Signal

In [21]:
new_output12=torch.div(torch.mul(torch.tensor(X,device=device),new_outputs13),torch.abs(torch.tensor(X,device=device)))
recovered_test_03_x=(new_output12.data).cpu().numpy()
signal_test_03_x= librosa.core.istft(recovered_test_03_x, hop_length=512)
ipd.display(ipd.Audio(librosa.core.istft(recovered_test_03_x, hop_length=512),rate=16000))
In [22]:
snr(train_clean_male,signal_test_03_x)
Out[22]:
14.378729480370753